libp2p operation analysis

In [1]:
# !pip install pyhmy --upgrade
import json
import pandas as pd
import os
from os import path
import shutil
import re
import plotly.express as px 
import plotly.graph_objects as go
import numpy as np
import gzip
from IPython.core.display import display, HTML
In [2]:
def read_data(node):
    log_dir = '/home/ubuntu/jupyter/logs/mainnet/{}'.format(node)
    files = os.listdir(log_dir)
    data = []
    for file in files:
        if "05-06" in file:
            with gzip.open(path.join(log_dir, file)) as f:
                for line in f.readlines():
                    if '[metrics][p2p]'.encode('utf-8') in line:
                        data.append(json.loads(line))
    return data
In [3]:
def data_processing(data, start_time, end_time):
    df = pd.DataFrame(data, columns = ['ip', 'TotalIn', 'RateIn', 'time', 'message', 'TotalOut', 'RateOut'])
    df['time'] = pd.to_datetime(df['time'], format = '%Y-%m-%dT%H:%M:%S.%f')
    print(f"available time slot: from {df.iloc[0]['time']} to {df.iloc[-1]['time']}")
    df = df[(df['time'] > start_time) & (df['time'] < end_time)]
    df.sort_values(by=['time'], ascending=True, inplace = True)
    return df
In [4]:
def draw_graph(df, node):
    html_path = "https://harmony-one.github.io/harmony-log-analysis/graphs/p2p/{}.html".format(node)

    df_in = df[df['message'] == '[metrics][p2p] traffic in in bytes']
    trace1 = go.Scatter(
            x = df_in['time'],
            y = df_in['TotalIn'],
            mode = 'lines+markers',
            line = dict(color = '#00AEE9',
                        width = 1),
            name = 'Received-in Bytes'
        )
    df_out = df[df['message'] == '[metrics][p2p] traffic out in bytes']
    trace2 = go.Scatter(
            x= df_out["time"],
            y= df_out["TotalOut"],
            mode='lines+markers',
            line = dict(color='#FFA07A', 
                        width=1),
            name = "Sent-out Bytes",
            yaxis = 'y2'
        )

    data = [trace1, trace2]
    layout = go.Layout(
                title = 'Bytes vs Time',
                xaxis_title="utc_time", 
                yaxis=dict(
                    title='Received-in Bytes'
                    ),
                yaxis2=dict(
                    title='Sent-out Bytes',
                    overlaying='y',
                    side='right'
                    ),
                legend_orientation="h", 
                legend=dict(x=0, y=-0.1)
            )
    fig = go.Figure(data=data, layout=layout)

    df_reset = df[df['message'] == '[metrics][p2p] Reset after 1 consensus cycle']
    for k in df_reset['time']:
        fig.add_shape(type="line", x0=k, y0=0,x1=k,y1=1,
                    line=dict(
                    width=1,
                    dash="dot",
                ))
    fig.update_shapes(dict(xref='x', yref='paper'))
    fig.update_layout(legend_orientation="h", legend=dict(x=0, y=-0.25))
    fig.show(renderer="svg",width=800, height=500)
    fig.write_html(html_dir + node + '.html')
    print("HTML saved in ")
    display(HTML("<a href='" + html_path + "' target='_blank'>" + html_path + "</a>"))
In [5]:
def analysis(node, start_time, end_time):
    data = read_data(node)
    df = data_processing(data, start_time, end_time)
    draw_graph(df,node)
In [6]:
def get_df(start, end):
    return df.iloc[start:end,]
In [7]:
html_dir = "/home/ubuntu/jupyter/harmony-log-analysis/docs/graphs/p2p/"
if not os.path.exists(html_dir):
    os.makedirs(html_dir)

Shard 0 Leader Node: 34.210.74.9

In [8]:
start_time = '2020-05-06 00:00:46'
end_time = '2020-05-06 00:10:46'
node = '34.210.74.9'
data = read_data(node)
df = data_processing(data, start_time, end_time)
available time slot: from 2020-05-06 00:00:46.711483476+00:00 to 2020-05-06 00:41:47.445475770+00:00
In [9]:
get_df(10,20)
Out[9]:
ip TotalIn RateIn time message TotalOut RateOut
10 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.827928714+00:00 [metrics][p2p] traffic in in bytes NaN NaN
11 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.833276847+00:00 [metrics][p2p] traffic in in bytes NaN NaN
12 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.834514495+00:00 [metrics][p2p] traffic in in bytes NaN NaN
13 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.834844429+00:00 [metrics][p2p] traffic in in bytes NaN NaN
14 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.836235189+00:00 [metrics][p2p] traffic in in bytes NaN NaN
15 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.837069025+00:00 [metrics][p2p] traffic in in bytes NaN NaN
16 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.838523409+00:00 [metrics][p2p] traffic in in bytes NaN NaN
17 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.838843725+00:00 [metrics][p2p] traffic in in bytes NaN NaN
18 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.840596250+00:00 [metrics][p2p] traffic in in bytes NaN NaN
19 34.210.74.9 69609.0 58080.393918 2020-05-06 00:00:46.840944170+00:00 [metrics][p2p] traffic in in bytes NaN NaN
In [10]:
draw_graph(df,node)
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k05001000150020002500Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 

Shard 0 Validator 1: 34.242.87.85

In [11]:
node = '34.242.87.85'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:30:30.812410917+00:00 to 2020-05-06 00:58:06.685324327+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k050100150200250300Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 

Shard 0 Validator 2: 34.244.166.68

In [12]:
node = '34.244.166.68'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:17:30.247099798+00:00 to 2020-05-06 00:44:30.130483713+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k050100150200250300Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 

Shard 0 Validator 3: 34.254.64.112

In [13]:
node = '34.254.64.112'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:12:42.843780912+00:00 to 2020-05-06 00:38:29.119146680+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k020040060080010001200Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 
In [14]:
node = '34.240.243.212'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:34:12.703299660+00:00 to 2020-05-06 00:59:59.470541919+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k050010001500Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 
In [15]:
node = '18.202.231.246'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:38:35.813393419+00:00 to 2020-05-06 01:04:28.892757724+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k020040060080010001200Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in 
In [16]:
node = '34.244.240.175'
analysis(node, start_time, end_time)
available time slot: from 2020-05-05 23:55:59.308275562+00:00 to 2020-05-06 01:22:02.050109982+00:00
00:02May 6, 202000:0400:0600:0800:10020k40k60k80k100k120k140k0200400600800Received-in BytesSent-out BytesBytes vs Timeutc_timeReceived-in BytesSent-out Bytes
HTML saved in